import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df=pd.read_csv("CAC 2 (Responses) - Form Responses 1.csv")

df.head()

df.tail()

df.columns

Index(['Timestamp', 'Course', 'State', 'Gender',
       'Overall how would you rate your mental health?\n',
       'Is there any history of mental health disorder in your family?\n',
       'On average, how many hours do you sleep per day?\n',
       'Have you ever used the counselling service provided by the college?',
       'Family Structure',
       'Is this the first time you're staying away from home?',
       'How is prayer and meditation important in your daily life?',
       'On average, how stressed are you? ',
       'How long have you been in Lavasa?',
       'What strategies do you use to cope with stress and anxiety in your daily life?'],
      dtype='object')

df.drop(["Timestamp"],axis=1,inplace=True)  #removing timestamp
#changing column names for ease
df.columns=["Course","State","Gender","Mental Health Rating","Family History","Sleep Duration","Counselling Service Usage","Family Structure","First Time Away From Home","Importance of Prayer,Meditation","Stress Score","Length of Stay","Coping Strategies"]
#dropping values beyond scope of  study
df["State"]=df["State"].replace("Other",np.nan)
df["Course"]=df["Course"].replace("Other",np.nan)
#replacing course name for ease
df["Course"] = df["Course"].replace("MSc Global Finance and Analytics","Msc GFA")
df.dropna(inplace=True)
df.head(5)

df.shape

(303, 13)

count_df=pd.DataFrame(index=df.columns)
count_df["Null Values"]=df.isnull().sum()
count_df["Unique Values"]=df.nunique()
count_df

df.size

3939

df.dtypes

Course                             object
State                              object
Gender                             object
Mental Health Rating                int64
Family History                     object
Sleep Duration                     object
Counselling Service Usage          object
Family Structure                   object
First Time Away From Home          object
Importance of Prayer,Meditation     int64
Stress Score                        int64
Length of Stay                     object
Coping Strategies                  object
dtype: object

df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 303 entries, 0 to 314
Data columns (total 13 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   Course                           303 non-null    object
 1   State                            303 non-null    object
 2   Gender                           303 non-null    object
 3   Mental Health Rating             303 non-null    int64 
 4   Family History                   303 non-null    object
 5   Sleep Duration                   303 non-null    object
 6   Counselling Service Usage        303 non-null    object
 7   Family Structure                 303 non-null    object
 8   First Time Away From Home        303 non-null    object
 9   Importance of Prayer,Meditation  303 non-null    int64 
 10  Stress Score                     303 non-null    int64 
 11  Length of Stay                   303 non-null    object
 12  Coping Strategies                303 non-null    object
dtypes: int64(3), object(10)
memory usage: 33.1+ KB

df.describe()

df.shape

(303, 13)

df.head()

df.to_csv("cleaned_data.csv")

df=pd.read_csv("cleaned_data.csv")

import seaborn as sns
import matplotlib.pyplot as plt
df["Stress Score"] = df["Stress Score"].replace(1,"Low_stress")
df["Stress Score"] = df["Stress Score"].replace(2,"Low_stress")
df["Stress Score"] = df["Stress Score"].replace(3,"Moderate_stress")
df["Stress Score"] = df["Stress Score"].replace(4,"High_stress")
df["Stress Score"] = df["Stress Score"].replace(5,"High_stress")
df["Course"] = df["Course"].replace("MSc Global Finance and Analytics","Msc GFA")
plt.figure(figsize=(12, 6))
sns.countplot(x='Course', hue='Stress Score', data=df,palette='dark:red',stat="percent")
plt.xlabel('stress')
plt.ylabel('Percent')
plt.title('Comparison of stress over the courses')
plt.xticks(rotation=45)
plt.legend(title='Legend')
plt.show()

df=pd.read_csv("cleaned_data.csv")
plt.figure(figsize=(10,5))

sns.stripplot(y=df["Stress Score"],x=df["Mental Health Rating"],size=5,jitter=.2,alpha=0.7)
plt.title("Mental Health Rating Vs Stress Score")
plt.xlabel("Mental Health Rating")
plt.ylabel("Stress Score")
plt.show()
plt.close()

df["Mental Health Rating"] = df["Mental Health Rating"].replace(1,"Low_Mental_Health")
df["Mental Health Rating"] = df["Mental Health Rating"].replace(2,"Low_Mental_Health")
df["Mental Health Rating"] = df["Mental Health Rating"].replace(3,"Average_Mental_Health")
df["Mental Health Rating"] = df["Mental Health Rating"].replace(4,"Good_Mental_Health")
df["Mental Health Rating"] = df["Mental Health Rating"].replace(5,"Good_Mental_Health")
mental_health_count=df.value_counts(["Mental Health Rating"])
mental_health_frame=mental_health_count.to_frame()
mental_health_frame.reset_index(inplace=True)
mental_health_frame.sort_values(by="count",ascending=True,inplace=True)
mental_health_frame

label_data= mental_health_frame["Mental Health Rating"]
count_data = mental_health_frame["count"]

 
plt.pie(count_data, labels=label_data,autopct='%1.1f%%',wedgeprops = { 'linewidth' : 2, 'edgecolor' : 'white' },colors=["red","darkgreen","hotpink","purple","orange"],startangle=0)
plt.title("Mental Health Rating among Students")
plt.show()

df["Stress Score"] = df["Stress Score"].replace(1,"Low_stress")
df["Stress Score"] = df["Stress Score"].replace(2,"Low_stress")
df["Stress Score"] = df["Stress Score"].replace(3,"Moderate_stress")
df["Stress Score"] = df["Stress Score"].replace(4,"High_stress")
df["Stress Score"] = df["Stress Score"].replace(5,"High_stress")
stress_count=df.value_counts(["Stress Score"])
stress_frame=stress_count.to_frame()
stress_frame.reset_index(inplace=True)
stress_frame.sort_values(by="count",ascending=True,inplace=True)
stress_frame

label_data= stress_frame["Stress Score"]
count_data = stress_frame["count"]

 
plt.pie(count_data, labels=label_data,autopct='%1.1f%%',wedgeprops = { 'linewidth' : 2, 'edgecolor' : 'white' },colors=["red","darkgreen","hotpink","purple","orange"],startangle=0)
plt.title("Stress Score among Students")
plt.show()

import plotly.express as px

df2=df.value_counts(["Length of Stay","Stress Score"])
df3=df2.to_frame()
df3.reset_index(inplace=True)

# %pip install nbformat

fig = px.scatter(x="Stress Score", y="Length of Stay",size_max=60,size="count",data_frame=df3)
fig.update_layout(title_text = "Length of stay Vs Stress",showlegend = True)
        
fig.show()

sns.stripplot(x=df["Counselling Service Usage"],y=df["Mental Health Rating"],size=3,jitter=.3)
plt.xlabel('Utilization of Counselling')
plt.ylabel('Mental Health Rating')
plt.title("Mental Health Rating Vs Counselling Usage")
plt.grid(True)
plt.show()

plt.figure(figsize=(5,5))
sns.countplot(hue="Counselling Service Usage",x="Mental Health Rating",data=df,palette="dark",stat="percent")

plt.title("Mental Health Rating and using of Counselling service")
plt.xticks(rotation=10)
plt.show()

df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(1,"Low_Importance")
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(2,"Low_Importance")
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(3,"Moderate_Importance")
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(4,"High_Importance")
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(5,"High_Importance")
sns.countplot(hue="Importance of Prayer,Meditation",x="Stress Score",data=df,stat="percent")
sns.despine(top=True)
plt.title("How Prayer, Meditation affects Stress")
plt.show()

df=pd.read_csv("cleaned_data.csv")
g1=df.set_index("Counselling Service Usage")
used_g_form=g1.loc[["No"]]

plt.figure(figsize=(5,5))
plt.hist(used_g_form["Mental Health Rating"],edgecolor="white",bins=[0.5,1.5,2.5,3.5,4.5,5.5],color="red")
plt.xlabel("Stress Score")
plt.ylabel("No. of Students not using Counselling Services")
plt.title("Stress Score among students not using Counselling service")
plt.show()

df["Sleep Duration"].replace("Less than 4 hours","Less than 6 hr",inplace=True)
df["Sleep Duration"].replace("4-6 hours","Less than 6 hr",inplace=True)
df["Sleep Duration"].replace("7-8 hours","6hr or more",inplace=True)
df["Sleep Duration"].replace("More than 8 hours","6hr or more",inplace=True)
plt.figure(figsize=(5,5))
sns.countplot(hue="Sleep Duration",x="Mental Health Rating",data=df,palette="pastel")
sns.despine(top=True)
plt.title("Sleep Duration Vs Mental Health Rating")

plt.show()

plt.figure(figsize=(5,5))
sns.countplot(hue="Sleep Duration",x="Stress Score",data=df,palette="bright")
sns.despine(top=True,right=True)
plt.title("Sleep Duration and Stress")
plt.show()

# df["Sleeping"]=df["Sleeping"].replace("7-8 hours","good")
# df["Sleeping"]=df["Sleeping"].replace("More than 8 hours","good")
# df["Sleeping"]=df["Sleeping"].replace("4-6 hours","modarate")
# df["Sleeping"]=df["Sleeping"].replace("Less than 4 hours","bad")
sns.stripplot(x=df["Sleep Duration"],y=df["Course"],size=4,jitter=.4,alpha=.8,hue=df["Gender"],)
plt.xlabel('Sleep')
plt.ylabel('Courses')
plt.title('Influence of Course on Sleep Duration')
plt.grid(True)
plt.xticks(rotation=45,fontsize=8)
plt.show()

sns.stripplot(x=df["Gender"],y=df["Mental Health Rating"],size=3,jitter=.3,alpha=0.5 )
plt.xlabel('Gender')
plt.ylabel("Mental Health Rating")
plt.title('Gender Vs Mental Health Rating ')
plt.show()

sns.stripplot(x=df["Gender"],y=df["Stress Score"],size=3,jitter=.3,alpha=0.5 )
plt.xlabel('Gender')
plt.ylabel("Stress Score")
plt.title('Gender Vs Stress Level ')
plt.show()

sns.stripplot(x=df["First Time Away From Home"],y=df["Stress Score"],size=3,jitter=.3,alpha=0.5)
plt.xlabel('Stress Score')
plt.ylabel('Staying away from home for first time?')
plt.title('Effect of staying away from home for first time')
plt.show()

df["Mental Health Rating"]=df["Mental Health Rating"].replace(1,"low_mentalhealth")
df["Mental Health Rating"]=df["Mental Health Rating"].replace(2,"low_mentalhealth")
df["Mental Health Rating"]=df["Mental Health Rating"].replace(3,"moderate_mentalhealth")
df["Mental Health Rating"]=df["Mental Health Rating"].replace(4,"high_mentalhealth")
df["Mental Health Rating"]=df["Mental Health Rating"].replace(5,"high_mentalhealth")
pivot_table = df.groupby(['Family Structure', 'Mental Health Rating']).size().unstack(fill_value=0)

plt.figure(figsize=(12, 6))
sns.heatmap(pivot_table, annot=True, cmap='viridis', fmt='d')
plt.xlabel('mental health')
plt.ylabel('family type')
plt.title('Correlation between family type and mental health')
plt.show()

df['Gender']=df['Gender'].replace("Other",np.nan)
df.dropna(inplace=True)
male_data = df[df['Gender'] == 'Male']
female_data = df[df['Gender'] == 'Female']
male_satisfaction = male_data["Coping Strategies"]
female_satisfaction = female_data["Coping Strategies"]

plt.figure(figsize=(10, 4))
plt.hist(male_satisfaction, bins=30, alpha=1, label='Male',align = 'left')
plt.hist(female_satisfaction, bins=30, alpha=1, label='Female',align = 'right')
plt.xlabel('escape method from stress')
plt.ylabel('Frequency')
plt.title('Analysis of Coping Strategies')
plt.legend()
plt.show()

	Mental Health Rating	Importance of Prayer,Meditation	Stress Score
count	303.000000	303.000000	303.000000
mean	3.069307	3.224422	3.587459
std	1.144443	1.202562	1.147171
min	1.000000	1.000000	1.000000
25%	2.000000	2.000000	3.000000
50%	3.000000	3.000000	4.000000
75%	4.000000	4.000000	4.000000
max	5.000000	5.000000	5.000000

	Timestamp	Course	State	Gender	Overall how would you rate your mental health?\n	Is there any history of mental health disorder in your family?\n	On average, how many hours do you sleep per day?\n	Have you ever used the counselling service provided by the college?	Family Structure	Is this the first time you're staying away from home?	How is prayer and meditation important in your daily life?	On average, how stressed are you?	How long have you been in Lavasa?	What strategies do you use to cope with stress and anxiety in your daily life?
0	10/10/2023 21:44:21	MSc DS	Kerala	Male	4	No	4-6 hours	No	Joint Family	Yes	4	4	Less than 6 months	Meditation
1	10/10/2023 22:14:11	MSc DS	Kerala	Female	2	No	Less than 4 hours	No	Nuclear Family	No	3	4	Less than 6 months	Listening music
2	10/10/2023 22:15:16	MSc DS	Uttar Pradesh	Female	4	No	4-6 hours	No	Nuclear Family	No	3	4	Less than 6 months	Listening music
3	10/10/2023 22:19:15	MSc DS	Kerala	Female	2	No	7-8 hours	Yes	Nuclear Family	No	3	4	Less than 6 months	Sleeping
4	10/10/2023 22:28:00	MSc DS	Other	Male	3	No	4-6 hours	No	Nuclear Family	No	5	4	Less than 6 months	Sports

	Timestamp	Course	State	Gender	Overall how would you rate your mental health?\n	Is there any history of mental health disorder in your family?\n	On average, how many hours do you sleep per day?\n	Have you ever used the counselling service provided by the college?	Family Structure	Is this the first time you're staying away from home?	How is prayer and meditation important in your daily life?	On average, how stressed are you?	How long have you been in Lavasa?	What strategies do you use to cope with stress and anxiety in your daily life?
310	10/24/2023 14:33:02	MBA	Kerala	Male	4	No	Less than 4 hours	No	Nuclear Family	Yes	3	4	Less than 6 months	Sleeping
311	10/24/2023 14:33:26	MBA	Kerala	Female	4	No	Less than 4 hours	No	Joint Family	No	3	4	Less than 6 months	Listening music
312	10/24/2023 14:35:22	MBA	Kerala	Female	4	No	Less than 4 hours	Yes	Joint Family	No	2	5	Less than 6 months	Sleeping
313	10/24/2023 14:36:03	MBA	Tamil Nadu	Female	4	No	Less than 4 hours	No	Joint Family	No	3	4	Less than 6 months	Listening music
314	10/24/2023 14:36:55	MBA	West Bengal	Male	4	No	4-6 hours	No	Joint Family	Yes	4	4	Less than 6 months	Sports

	Null Values	Unique Values
Course	0	13
State	0	27
Gender	0	3
Mental Health Rating	0	5
Family History	0	2
Sleep Duration	0	4
Counselling Service Usage	0	2
Family Structure	0	2
First Time Away From Home	0	2
Importance of Prayer,Meditation	0	5
Stress Score	0	5
Length of Stay	0	3
Coping Strategies	0	7

	Mental Health Rating	count
2	Average_Mental_Health	74
1	Low_Mental_Health	110
0	Good_Mental_Health	119

	Stress Score	count
2	Low_stress	55
1	Moderate_stress	71
0	High_stress	177